직접 주소화 테이블(Direct Adress Table)

#include <iostream>

#include <cstdio>

using namespace std;

struct Node{

int key, value;

Node(){}

Node(int _key, int _value): key(_key), value(_value) {}

};

struct DirectAddress{

Node* table;

int key_sz;

DirectAddress(int _key_sz): key_sz(_key_sz){

this->table=new Node[key_sz];

for(int i=0; i<this->key_sz; ++i){

this->table[i].key=i;

this->table[i].value=0;

}

int search(int key){

if(this->table[key].value==0){

cout<<"No Key-Value"<<endl;

return 0;

}

return table[key].value;

}

void insertNode(int key, int data){

this->table[key].key=key;

this->table[key].value=data;

}

void insertNode(Node x){

this->table[x.key].key=x.key;

this->table[x.value].value=x.value;

}

void deleteNode(Node x){

this->table[x.key].value=0;

}

};

int main(void){

Node x1(0, 1), x2(1, 5), x3(2, 7);

DirectAddress dA(10);

dA.insertNode(x1);

dA.insertNode(x2);

dA.insertNode(x3);

cout<<"value x2's value: "<<dA.search(x2.key)<<'\n';

cout<<"erase x2:\n";

dA.deleteNode(x2);;

cout<<"value x2's value: "<<dA.search(x2.key)<<endl;

return 0;

}

위의 방법은 모든 배열에 대해 나열하는것과 크게 다르지 않음

전체 집합 U에 비해 키들의 집합 K가 상대적으로 작은 경우 많은 메모리가 낭비됨.

해시 테이블을 이용해 메모리를 효율적으로 사용할 수 있다.

Hashing을 이용할 경우, 두 개 이상의 키가 동일한 위치에 해싱될 수 있음(충돌)

Chaining Hash Table

#include <iostream>

using namespace std;

struct Node{

Node* next;

int key, value;

Node(int _key, int _value): key(_key), value(_value) {}

Node(int _key, int _value, Node* _next): key(_key), value(_value), next(_next){}

};

struct HashTable{

Node** table;

int hash_sz;

HashTable(int _hash_sz): hash_sz(_hash_sz){

table=new Node*[hash_sz];

for(int i=0; i<hash_sz; ++i) table[i]=nullptr;

}

int hashing(int key){

return key%hash_sz;

}

void Insert(int key, int value){

int hash_num=hashing(key);

if(table[hash_num]==nullptr){

table[hash_num]=new Node(key, value);

} else {

Node* cur=table[hash_num];

table[hash_num]=new Node(key, value, cur);

}

int Search(int key){

int hash_num=hashing(key);

Node* cur=table[hash_num];

if(cur==nullptr){

perror("NO KEY VALUE");

exit(1);

}

while(cur->key!=key){

if(cur->next==nullptr){

perror("NO KEY VALUE");

exit(1);

}

cur=cur->next;

}

return cur->value;

}

void Delete(int key){

int hash_num=hashing(key);

Node* cur=table[hash_num];

Node* tmp;

if(cur==nullptr){

perror("NO KEY VALUE");

exit(1);

}

while(cur->key!=key){

if(cur->next==nullptr){

perror("NO KEY VALUE");

exit(1);

}

tmp=cur;

cur=cur->next;

}

tmp->next=cur->next;

delete cur;

}

void print(void){

for(int i=0; i<hash_sz; ++i){

Node* cur=table[i];

while(cur!=nullptr){

cout<<cur->value<<", ";

cur=cur->next;

}

};

int main(void){

HashTable hash(5);

for(int i=0; i<20; ++i){

hash.Insert(i, i);

}

cout<<"Key 10's value: "<<hash.Search(10)<<endl;

hash.Delete(10);

cout<<"Delete key 10\n";

hash.print();

return 0;

}

Hash Function

- 나누기 방법(Division Method)

m으로 2의 지수승 값에 근접하지 않는 소수를 택하는 것이 좋음

- 곱하기 방법(Multiplication Method)

- 유니버설 해싱(Universal Hashing)

h(k)=k $h(k)=k$ mod m $m$ h(k)=⌊m(kA $h(k)=\lfloor m(kA$ mod 1)⌋ $1)\rfloor$ hab(k)=((ak+b) $h_{ab}(k)=((ak+b)$ mod p) $p)$ mod m $m$

개방 주소화 방법(Open-Addressing Method)

체이닝 기법을 사용하는 것이 아닌 해시 테이블 그 자체에 저장한다.

(해시 테이블의 적재률이 1을 넘길 수 없음)

포인터를 사용할 필요 없기 때문에 포인터(체이닝) 구현을 위한 메모리를 온전히 원소 저장에 사용 가능

단, 개방 주소화 방법은 원소 삭제에서 불리하기 때문에 키의 삭제가 빈번한 경우 체이닝 기법을 사용한다.

선형 조사 해싱의 중복 충돌을 해결하기 위해서 해싱값과 적당한 offset 떨어진곳에 연속으로 저장한다. 1차 군집 문제: h(k,i)=(h′(k)+i) $h(k, i)=(h'(k)+i)$ mod m $m$ 연속적으로 길게 저장된 데이터의 경우, 체증을 일으켜 평균 검색 시간이 증가하는 문제를 야기한다. 2차원 조사: h(k,i)=(h′(k)+c1i+c2i2) $h(k, i)=(h'(k)+c_1i+c_2i^2)$ mod m $m$ 중복 해싱: h(k,i)=(h1(k)+ih2(k)) $h(k, i)=(h_1(k)+ih_2(k))$ mod m $m$

완전 해싱(Perfect Hashing)

: 모든 키들이 서로 다른 해시번지를 가지는 해시 함수(충돌 X)

동록하는 키의 전체 집합을 알고 있는 경우, 구성 가능

두 단계의 해싱으로 구성된다.

첫 번째 해싱은 체이닝 해싱과 동일하다.

두 번째 해싱에서 연결 리스트를 만드는 것이 아닌 보조 해시 테이블 S를 이용한다.

해시 함수 h_1, h_2를 적절하게 선택함으로써 두 번째 해싱에서 충돌이 발생하지 않도록 보장할 수 있다.